Code
library(edgeR)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scuttle)
library(pheatmap)
library(patchwork)
library(ggbeeswarm)
library(EnhancedVolcano)
library(SingleCellExperiment).volcano <- \(df, title, fdr = 0.05, lfc = 1) {
EnhancedVolcano(df,
x = "logFC", y = "FDR",
FCcutoff = lfc, pCutoff = fdr,
pointSize = 1, raster = TRUE,
title = title, subtitle = NULL,
lab = df[["gene"]], labSize = 2,
drawConnectors = TRUE, widthConnectors = 0.5) +
guides(col = guide_legend(override.aes = list(alpha = 1, size = 3))) +
theme_bw(9) + theme(
aspect.ratio = 1,
legend.title = element_blank(),
panel.grid.minor = element_blank())
}# setup subtype groupings
names(tum) <- tum <- c("RCC", "LSCC")
names(kid) <- kid <- c("Kidney", "RCC")
names(lun) <- lun <- c("Alveoles", "LSCC")
names(tls) <- tls <- c("E_TLS", "SFL_TLS", "PFL_TLS", "Tcell_TLS")
# split data by tumor type & TLS
dat <- c(list(all = sub,
RCC = sub[, sub$TumorType == "RCC"],
LSCC = sub[, sub$TumorType == "LSCC"]),
lapply(tls, \(.) sub[, sub$TissueSub == .]))ref <- c(
all = unname(kid[1]),
RCC = unname(kid[1]),
LSCC = unname(lun[1]),
sapply(tls, \(.) "LSCC"))
names(ids) <- ids <- names(dat)
dat <- dat[ids]; ref <- ref[ids]
fit <- lapply(ids, \(.) {
# setup design matrix
df <- data.frame(colData(dat[[.]]))
if (. %in% tls) {
tt <- factor(df$TumorType)
df$TumorType <- relevel(tt, ref[.])
mm <- model.matrix(~0+TumorType, df)
colnames(mm) <- gsub("TumorType", "", colnames(mm))
} else {
st <- droplevels(df$TissueSub)
df$TissueSub <- relevel(st, ref[.])
mm <- model.matrix(~0+TissueSub, df)
colnames(mm) <- gsub("TissueSub", "", colnames(mm))
}
# fit GLM model
dgl <- DGEList(assay(dat[[.]]))
dgl <- calcNormFactors(dgl)
dgl <- estimateDisp(dgl, mm)
fit <- glmQLFit(dgl, mm)
})# setup contrasts
cs <- c(list(
# within tumor types
RCC = c(
# normal/tumor vs. any TLS
list(TLS = list(kid, tls)),
# normal/tumor vs. specific TLS
lapply(tls, \(t) list(kid, t)),
# TLS maturation stages
list(
SFL = list("E_TLS", "SFL_TLS"),
Tcell = list("E_TLS", "Tcell_TLS"))),
LSCC = c(
# normal/tumor vs. any TLS
list(TLS = list(lun, tls)),
# normal/tumor vs. specific TLS
lapply(tls, \(t) list(lun, t)),
# TLS maturation stages
list(
SFL = list("E_TLS", "SFL_TLS"),
Tcell = list("E_TLS", "Tcell_TLS")))),
# kidney/RCC vs. lung/LSCC
list(all = list(parenchyma = list(kid, lun))),
# across tumor types, within TLS subsets
lapply(tls, \(.) setNames(list("RCC"), .)))
cs <- lapply(ids, \(.) {
x <- numeric(ncol(mm <- fit[[.]]$design))
lapply(cs[[.]], \(c) {
if (length(c) == 1) {
i <- match(c, colnames(mm))
x[i] <- 1; x[-i] <- -1
} else {
a <- match(c[[1]], colnames(mm))
b <- match(c[[2]], colnames(mm))
x[a] <- -1/sum(a != 0)
x[b] <- 1/sum(b != 0)
}
return(x)
})
})# run DGE analysis
res <- lapply(ids, \(.) {
lapply(names(cs[[.]]), \(c) {
ht <- glmQLFTest(fit[[.]], contrast = cs[[.]][[c]])
tt <- topTags(ht, n = Inf)$table
data.frame(row.names = NULL,
gene = rownames(tt), tt,
contrast = c, subset = .)
}) |> do.call(what = rbind)
}) |> do.call(what = rbind)
rownames(res) <- NULLdf <- res[res$contrast %in% c("SFL", "Tcell"), ]
ps <- lapply(unique(df$subset), \(s) {
lapply(unique(df$contrast), \(c) {
df <- res[res$subset == s & res$contrast == c, ]
.volcano(df, fdr = 1e-4, lfc = 1.25, title = paste(s, c, sep = ","))
})
})
wrap_plots(Reduce(c, ps), nrow = 2) +
plot_layout(guides = "collect") &
theme(legend.position = "top")top <- res %>%
filter(FDR < 0.05) %>%
filter(subset == "all") %>%
slice_max(abs(logFC), n = 80)
mtx <- logcounts(sub)[top$gene, sub$TissueSub %in% c(kid, lun)]
cd <- data.frame(colData(sub))[c("TissueSub")]
hm <- pheatmap(mtx,
main = "parenchyma", fontsize = 6,
col = rev(hcl.colors(51, "RdBu")),
scale = "row", show_colnames = FALSE, annotation_col = cd)top <- res %>%
filter(!subset %in% tls) %>%
group_by(subset) %>%
filter(FDR < 0.1, contrast == "TLS") %>%
slice_max(abs(logFC), n = 40) %>%
split(.$subset)
for (. in names(top)) {
cat("####", ., "\n")
mtx <- logcounts(sub)[top[[.]]$gene, sub$TumorType == .]
cd <- data.frame(colData(sub))[c("TissueSub")]
hm <- pheatmap(mtx,
main = ., fontsize = 6,
col = rev(hcl.colors(51, "RdBu")),
scale = "row", show_colnames = FALSE, annotation_col = cd)
print(hm); cat("\n\n")
}top <- res %>%
group_by(subset) %>%
filter(contrast %in% c("SFL", "Tcell")) %>%
slice_max(abs(logFC), n = 40) %>%
split(.$subset)
for (. in names(top)) {
cat("####", ., "\n")
idx <- sub$TumorType == . & sub$TissueSub %in% c(
"E_TLS", paste0(unique(top[[.]]$contrast), "_TLS"))
mtx <- logcounts(sub)[top[[.]]$gene, idx]
cd <- data.frame(colData(sub))[c("TissueSub")]
hm <- pheatmap(mtx,
main = ., fontsize = 6,
col = rev(hcl.colors(51, "RdBu")),
scale = "row", show_colnames = FALSE, annotation_col = cd)
print(hm); cat("\n\n")
}top <- res %>%
filter(subset %in% tls) %>%
group_by(subset) %>%
slice_max(abs(logFC), n = 25) %>%
split(.$subset)
for (. in names(top)) {
cat("####", ., "\n")
plt <- ggplot(
filter(gg,
TissueSub == .,
gene %in% top[[.]]$gene),
aes(TumorType, expr, fill = TumorType)) +
facet_wrap(~ gene, scales = "free_y") +
geom_boxplot(size = 0.1, fill = NA, outlier.color = NA, show.legend = FALSE) +
geom_beeswarm(shape = 21, col = "black", stroke = 0.1, size = 1.2, alpha = 0.8) +
guides(fill = guide_legend(override.aes = list(size = 3, alpha = 1))) +
labs(x = NULL, y = "Expression (logCPM)") +
scale_fill_brewer(palette = "Set2") +
theme_linedraw(9) + theme(
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.background = element_blank(),
legend.key.size = unit(0.5, "lines"),
strip.text = element_text(color = "black", face = "bold"))
print(plt); cat("\n\n")
}R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=de_CH.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=de_CH.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=de_CH.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=de_CH.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Zurich
tzcode source: system (glibc)
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] EnhancedVolcano_1.18.0 ggrepel_0.9.3
[3] ggbeeswarm_0.7.2 patchwork_1.1.3
[5] pheatmap_1.0.12 scuttle_1.10.2
[7] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[9] Biobase_2.60.0 GenomicRanges_1.52.0
[11] GenomeInfoDb_1.36.3 IRanges_2.34.1
[13] S4Vectors_0.38.1 BiocGenerics_0.46.0
[15] MatrixGenerics_1.12.3 matrixStats_1.0.0
[17] ggplot2_3.4.3 tidyr_1.3.0
[19] dplyr_1.1.3 edgeR_3.42.4
[21] limma_3.56.2
loaded via a namespace (and not attached):
[1] beeswarm_0.4.0 gtable_0.3.4
[3] xfun_0.40 htmlwidgets_1.6.2
[5] lattice_0.22-5 Cairo_1.6-1
[7] vctrs_0.6.3 tools_4.3.2
[9] bitops_1.0-7 generics_0.1.3
[11] parallel_4.3.2 tibble_3.2.1
[13] fansi_1.0.4 pkgconfig_2.0.3
[15] Matrix_1.6-1 RColorBrewer_1.1-3
[17] sparseMatrixStats_1.12.2 lifecycle_1.0.3
[19] GenomeInfoDbData_1.2.10 farver_2.1.1
[21] compiler_4.3.2 munsell_0.5.0
[23] codetools_0.2-19 vipor_0.4.5
[25] htmltools_0.5.6 RCurl_1.98-1.12
[27] yaml_2.3.7 pillar_1.9.0
[29] crayon_1.5.2 BiocParallel_1.34.2
[31] DelayedArray_0.26.7 abind_1.4-5
[33] tidyselect_1.2.0 locfit_1.5-9.8
[35] digest_0.6.33 purrr_1.0.2
[37] labeling_0.4.3 splines_4.3.2
[39] fastmap_1.1.1 grid_4.3.2
[41] colorspace_2.1-0 cli_3.6.1
[43] magrittr_2.0.3 S4Arrays_1.0.6
[45] utf8_1.2.3 withr_2.5.1
[47] DelayedMatrixStats_1.22.6 scales_1.2.1
[49] rmarkdown_2.24 XVector_0.40.0
[51] beachmat_2.16.0 evaluate_0.21
[53] ggrastr_1.0.2 knitr_1.44
[55] rlang_1.1.1 Rcpp_1.0.11
[57] glue_1.6.2 rstudioapi_0.15.0
[59] jsonlite_1.8.7 R6_2.5.1
[61] zlibbioc_1.46.0